The dataset Retail Data Analytics is avaliable on Kaggle and provides with historical sales data for 45 stores located in different regions - each store contains a number of departments.

Stores

Anonymized information about the 45 stores, indicating the type and size of store

Features

Contains additional data related to the store, department, and regional activity for the given dates.

Sales

Historical sales data, which covers to 2010-02-05 to 2012-11-01. Within this tab you will find the following fields:

Load libraries

Read dataset

sales <- read_csv("retail-data-analytics/sales data-set.csv")
## Parsed with column specification:
## cols(
##   Store = col_double(),
##   Dept = col_double(),
##   Date = col_character(),
##   Weekly_Sales = col_double(),
##   IsHoliday = col_logical()
## )
features <- read_csv("retail-data-analytics/Features data set.csv")
## Parsed with column specification:
## cols(
##   Store = col_double(),
##   Date = col_character(),
##   Temperature = col_double(),
##   Fuel_Price = col_double(),
##   MarkDown1 = col_double(),
##   MarkDown2 = col_double(),
##   MarkDown3 = col_double(),
##   MarkDown4 = col_double(),
##   MarkDown5 = col_double(),
##   CPI = col_double(),
##   Unemployment = col_double(),
##   IsHoliday = col_logical()
## )
stores <- read_csv("retail-data-analytics/stores data-set.csv")
## Parsed with column specification:
## cols(
##   Store = col_double(),
##   Type = col_character(),
##   Size = col_double()
## )

Merging Datasets

sales_complete <- merge(sales, stores, by="Store")
sales_complete <- merge(sales_complete, features, by=c("Store","Date", "IsHoliday"))
head(sales_complete)
##   Store       Date IsHoliday Dept Weekly_Sales Type   Size Temperature
## 1     1 01/04/2011     FALSE   49     13167.85    A 151315       59.17
## 2     1 01/04/2011     FALSE   26      5946.53    A 151315       59.17
## 3     1 01/04/2011     FALSE   81     28545.23    A 151315       59.17
## 4     1 01/04/2011     FALSE   34      9949.54    A 151315       59.17
## 5     1 01/04/2011     FALSE   59       316.86    A 151315       59.17
## 6     1 01/04/2011     FALSE   30      3897.48    A 151315       59.17
##   Fuel_Price MarkDown1 MarkDown2 MarkDown3 MarkDown4 MarkDown5      CPI
## 1      3.524        NA        NA        NA        NA        NA 214.8372
## 2      3.524        NA        NA        NA        NA        NA 214.8372
## 3      3.524        NA        NA        NA        NA        NA 214.8372
## 4      3.524        NA        NA        NA        NA        NA 214.8372
## 5      3.524        NA        NA        NA        NA        NA 214.8372
## 6      3.524        NA        NA        NA        NA        NA 214.8372
##   Unemployment
## 1        7.682
## 2        7.682
## 3        7.682
## 4        7.682
## 5        7.682
## 6        7.682

Data Processing

## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 421570 obs. of  5 variables:
##  $ Store       : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Dept        : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : Date, format: "2010-02-05" "2010-02-12" ...
##  $ Weekly_Sales: num  24925 46039 41596 19404 21828 ...
##  $ IsHoliday   : num  0 1 0 0 0 0 0 0 0 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Store = col_double(),
##   ..   Dept = col_double(),
##   ..   Date = col_character(),
##   ..   Weekly_Sales = col_double(),
##   ..   IsHoliday = col_logical()
##   .. )

Subsetting

sales_complete_subset <- subset(sales_complete, Store >= 1 & Store <= 5)
sales_subset <- subset(sales, Store >= 1 & Store <= 5)
sales_store1_dept1 <- sales_subset[which(sales_subset$Store==1 & sales_subset$Dept == 1),]

sales_store1_dept2 <- sales_subset[which(sales_subset$Store==1 & sales_subset$Dept == 2),]

sales_stores1to5_dept1 <-  sales_subset[which(sales_subset$Dept == 1),]
p <- plot_ly(sales_complete, x = ~log(Weekly_Sales), color = ~Type, type = "box")
p
## Warning in log(Weekly_Sales): NaNs produced
## Warning: Ignoring 1285 observations
p <- plot_ly(sales_subset, x = ~Weekly_Sales, color = ~Store, type = "box")
p
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
## Warning in min(x, na.rm = na.rm): no non-missing arguments to min;
## returning Inf
## Warning in max(x, na.rm = na.rm): no non-missing arguments to max;
## returning -Inf
## Warning: line.color doesn't (yet) support data arrays
## Warning: Only one fillcolor per trace allowed
plot_ly(sales_store1_dept1, 
        x = ~Date, y = ~ Weekly_Sales)
## No trace type specified:
##   Based on info supplied, a 'scatter' trace seems appropriate.
##   Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
##   Setting the mode to markers
##   Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
plot_ly(sales_store1_dept1, 
        x = ~Date,
        y = ~Weekly_Sales, 
        type = 'scatter', 
        mode = 'lines') %>%
layout(title = "Weekly Sales: Store 1 and Dept 1") 
p1 <- plot_ly(sales_store1_dept1, x = ~Date, y = ~Weekly_Sales) %>% 
  add_lines(name = "Store 1 Dept 1")
p2 <- plot_ly(sales_store1_dept2, x = ~Date, y = ~Weekly_Sales) %>% 
  add_lines(name = "Store 1 Dept 2")
subplot(p1, p2)

Before we use the subplot function, we need to change our data format from long to wide using the function spread.

sales_store1_dept1and2_wide <- spread(sales_stores1to5_dept1, Store, Weekly_Sales)

# select only the columns to create the plot
sales_store1_dept1and2_wide <- sales_store1_dept1and2_wide[c("Date",1:5)]

# rename the columns
names(sales_store1_dept1and2_wide) <- c("Date", "Store1","Store2","Store3","Store4", "Store5")
vars <- setdiff(names(sales_store1_dept1and2_wide), "Date")
plots <- lapply(vars, function(var) {
  plot_ly(sales_store1_dept1and2_wide, x = ~Date, y = as.formula(paste0("~", var))) %>%
    add_lines(name = var)
})
subplot(plots, nrows = length(plots), shareX = TRUE, titleX = FALSE)

Now, it is your turn to create a beautiful HTML report from the provided Dataset in Rmarkown.Add in your report at least one

  • Image
  • GIF
  • text
  • plots
  • tables
  • hyperlinks
  • titles
  • Other things you want

Happy Victoria Day!!!

This presentation was based on